In [1]:
import pandas as pd, numpy as np
import kendo_romania
Read data
In [2]:
matches={i:{} for i in range(1993,2019)}
In [3]:
filename='rawdata/2018/CR/CR25 - Public.xlsx'
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
matches[2018]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3)
In [5]:
filename='rawdata/2018/SL/Prezenta SL_WKC17.xlsx'
sheetname=['F','M']
matches[2018]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,5)
In [17]:
filename='rawdata/2018/CN/Event management CN25.xlsx'
sheetname='Shiai'
column_keys={'match_type':3,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=-1
matches[2018]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
In [18]:
matches[2018]['CN'][-13]
Out[18]:
In [19]:
categories=['Individual masculin','Echipe']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=0
matches[2017]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [20]:
categories=['Individual juniori mici','Individual juniori mari','Individual feminin']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CN']=matches[2017]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [21]:
categories=['Individual masculin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2017]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [22]:
categories=['Individual juniori','Individual veterani','Individual feminin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CR']=matches[2017]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [23]:
categories=['Echipe']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=0
matches[2017]['CR']=matches[2017]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [24]:
filename='rawdata/2017/SL/Prezenta.xlsx'
sheetname=['F','M','J']
matches[2017]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)
In [25]:
filename='rawdata/2016/SL/Event management - stagiul 4.xlsx'
sheetname=['F','M']
matches[2016]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)
In [26]:
sheetname=['J']
matches[2016]['SL']=matches[2016]['SL']+\
kendo_romania.get_matches_from_table(filename,sheetname,5)
In [27]:
categories=['Individual masculin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2016]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [28]:
categories=['Individual feminin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [29]:
categories=['Echipe','Male team']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=0
matches[2016]['CN']=matches[2016]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [30]:
categories=['Junior 1 individual','Junior 2 individual']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)
In [31]:
filename='rawdata/2016/CR/Event management_CR23.2016.xlsx'
sheetname=['IF_m','IJ_m','IM_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2016]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname=['EJ_m','ES_m']
matches[2016]['CR']=matches[2016]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
filename='rawdata/2015/SL/Event management - stagiul 5.xlsx'
sheetname=['SF_s','SM_s']
matches[2015]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)
In [ ]:
filename='rawdata/2015/CN/Event management_CN22.2015.xlsx'
sheetname=['IF_m','IJ2_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname='E_m'
matches[2015]['CN']=matches[2015]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IF_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ1_s']
matches[2015]['CR']=matches[2015]['CR']+\
kendo_romania.get_matches_from_table(filename,
sheetname,skiprows=7,shift=1,nrows=9)
In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ2_s']
matches[2015]['CR']=matches[2015]['CR']+\
kendo_romania.get_matches_from_table(filename,
sheetname,skiprows=8,shift=12,nrows=8)
matches[2015]['CR']=matches[2015]['CR']+\
kendo_romania.get_matches_from_table(filename,
sheetname,skiprows=16,shift=12,nrows=8)
In [ ]:
sheetname=['IM_s']
column_keys={'match_type':19,'aka':{'name':20,'point1':21},
'shiro':{'name':24,'point1':23},'outcome':22}
shift=0
matches[2015]['CR']=matches[2015]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=10
matches[2015]['CR']=matches[2015]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
In [ ]:
filename='rawdata/2014/SL/Lista de participanti 6.xlsx'
sheetname=['SF_s','SM_s','J_s']
matches[2014]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)
In [ ]:
filename='rawdata/2014/CR/Event management_CR21.2014.xlsx'
sheetname=['IC-10_m','IC_m','IJ_m','IS_m','IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=8
matches[2014]['CR']=matches[2014]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
In [ ]:
filename='rawdata/2014/CN/Event management_CN21.2014 - v2.xlsx'
sheetname=['IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=19
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=29
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
In [ ]:
sheetname=['IJ1_s']
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_table(filename,sheetname,7,shift=1,nrows=10)
In [ ]:
sheetname=['IJ2_s']
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_table(filename,sheetname,8,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_table(filename,sheetname,14,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
kendo_romania.get_matches_from_table(filename,sheetname,20,shift=12,nrows=6)
In [ ]:
filename='rawdata/2013/CN/Event management_CN2013.xlsx'
sheetname=['IS_m','IF_m','IC_m','IJ_m','E_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
filename='rawdata/2013/CR/Event management_CR2013.xlsx'
sheetname=['IF_meciuri','IJ_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
filename='rawdata/2013/SL/Event management.xlsx'
sheetname=['E_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['SL']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname=['Schema feminin']
matches[2013]['SL']=matches[2013]['SL']+\
kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=14)
sheetname=['Schema juniori']
matches[2013]['SL']=matches[2013]['SL']+\
kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=12)
In [ ]:
filename='rawdata/2012/CN/Event management CN2012.xlsx'
sheetname=['E_meciuri','IJ_meciuri','IF_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':6,'point1':5},'outcome':3,
'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2012]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
filename='rawdata/2012/CR/2012.05.05-06 - CR - Cluj.xlsx'
sheetname=['IC']
matches[2012]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,12,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=4)
In [ ]:
sheetname=['IJ']
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,14,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,19,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,24,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,30,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,35,shift=1,nrows=3)
In [ ]:
sheetname=['IF']
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=3)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,22,shift=shift)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['ES']
column_keys={'match_type':20,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=4
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=9
matches[2012]['CR']=matches[2012]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
filename='rawdata/2011/CN/2011.11.26-27 - CN - Bucuresti_print.xlsx'
sheetname=['IJ']
matches[2011]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,23,shift=1)
In [ ]:
sheetname=['IF']
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,23,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,28,shift=shift)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['E']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=17
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=23
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=29
matches[2011]['CN']=matches[2011]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
In [ ]:
filename='rawdata/2011/CR/2011.04.16-17 - CR - Miercurea Ciuc.xlsx'
sheetname=['ES']
column_keys={'match_type':6,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2011]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['IF']
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,15,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)
In [ ]:
sheetname=['IJ']
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,16,shift=1,nrows=3)
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,21,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,27,shift=1,nrows=3)
In [ ]:
sheetname=['IC']
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,4,shift=0,nrows=4)
In [ ]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)
In [ ]:
filename='rawdata/2010/CR/2010.03.27-28 - CR - Budeasa.xlsx'
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2010]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['IF']
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,15,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)
In [ ]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)
In [ ]:
sheetname=['IJ']
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,16,shift=1,nrows=3)
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,21,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,27,shift=1,nrows=3)
In [ ]:
sheetname=['IC']
matches[2010]['CR']=matches[2010]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,4,shift=0,nrows=4)
In [ ]:
filename='rawdata/2010/CN/2010.11.27-28 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2010]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,point_shift=0,nrows=5)
In [ ]:
sheetname=['IC']
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=3)
In [ ]:
sheetname=['IF']
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,18,shift=1,nrows=3)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=12
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
In [ ]:
sheetname=['E']
column_keys={'match_type':15,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=5
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=11
matches[2010]['CN']=matches[2010]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
In [ ]:
filename='rawdata/2009/CN/2009.11.28-29 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2009]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,4,shift=0,nrows=4)
In [ ]:
sheetname=['IF']
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,12,shift=1,point_shift=0,nrows=5)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
In [ ]:
filename='rawdata/2009/CR/2009.04.04 - CR - Budeasa - print.xlsx'
sheetname=['IJ']
matches[2009]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,12,shift=1,point_shift=0,nrows=5)
In [ ]:
sheetname=['IF']
matches[2009]['CR']=matches[2009]['CR']+\
kendo_romania.get_matches_from_table_oneliner(filename,
sheetname,13,shift=1,point_shift=0,nrows=6)
In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CR']=matches[2009]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
In [ ]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CR']=matches[2009]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
Clean up points, matches, player names
In [ ]:
def match_cleaner(year,match):
kind,phase='Unknown','Unknown'
if '#' in match:
stage0=match.split('#')[0].lower()
stage1=match.split('#')[1]
if 'pool' in stage1:
phase='Pool'
if 'Pool' in stage1:
phase='Pool'
elif 'prel' in stage1:
phase='Prelim.'
elif 'Prel' in stage1:
phase='Prelim.'
elif 'layoff' in stage1:
phase='Prelim.'
elif '- F' in stage1:
phase='Finals'
elif 'F -' in stage1:
phase='Finals'
elif 'Final' in stage1:
phase='Finals'
elif 'SF' in stage1:
phase='Finals'
elif 'QF' in stage1:
phase='Finals'
elif 'A'==stage1: phase='Pool'
elif 'B'==stage1: phase='Pool'
elif 'C'==stage1: phase='Pool'
elif 'D'==stage1: phase='Pool'
elif 'E'==stage1: phase='Pool'
elif 'F'==stage1: phase='Pool'
elif 'G'==stage1: phase='Pool'
elif 'H'==stage1: phase='Pool'
elif 'I'==stage1: phase='Pool'
elif 'J'==stage1: phase='Pool'
elif 'K'==stage1: phase='Pool'
elif 'L'==stage1: phase='Pool'
elif 'M'==stage1: phase='Pool'
elif 'N'==stage1: phase='Pool'
elif 'O'==stage1: phase='Pool'
elif 'P'==stage1: phase='Pool'
elif 'Q'==stage1: phase='Pool'
elif 'R'==stage1: phase='Pool'
elif 'S'==stage1: phase='Pool'
elif 'T'==stage1: phase='Pool'
if 'IS' in stage1:
kind="Senior's Individual"
elif 'IF' in stage1:
kind="Women's Individual"
elif 'IM' in stage1:
kind="Men's Individual"
elif 'IC' in stage1:
kind="Children's Individual"
elif 'IJ' in stage1:
kind="Junior's Individual"
elif 'EJ' in stage1:
kind="Junior's Team"
elif 'EF' in stage1:
kind="Men's Team"
elif 'ES' in stage1:
kind="Senior's Team"
if 'individual masculin.' in stage0:
kind="Men's Individual"
if 'echipe.' in stage0:
kind="Mixed Team"
if 'individual juniori' in stage0:
kind="Junior's Team"
if 'individual feminin' in stage0:
kind="Junior's Team"
if 'individual veterani' in stage0:
kind="Senior's Team"
if 'male team' in stage0:
kind="Men's Team"
if 'junior 1 individual' in stage0:
kind="Junior's Individual"
if 'junior 2 individual' in stage0:
kind="Junior's Individual"
elif match=='F':
kind="Women's Individual"
elif match=='M':
kind="Men's Individual"
elif match=='J':
kind="Junior's Individual"
elif match=='SF_s':
kind="Women's Individual"
elif match=='SM_s':
kind="Men's Individual"
elif match=='J_s':
kind="Junior's Individual"
if kind=='Unknown':
category='Unknown'
teams='Unknown'
else:
category=kind.split(' ')[0][:-2]
teams=kind.split(' ')[1]
if year<2014:
category=category.replace('Senior','Men')
if year in [2018]:
category=category.replace('Senior','Men')
return category,teams,phase
Load names
In [ ]:
members=pd.read_csv('data/members_base.csv')
In [ ]:
members.head()
In [ ]:
name_exceptions={'Atanasovski':'Atanasovski A. (MAC)',
'Dobrovicescu (SON)':'Dobrovicescu T. (SON)',
'Ianăș':'Ianăș F.',
'Crăciun (Tamang) Sujata':'Crăciun S.',
'Crăciun (Tamang) Sujata':'Crăciun S.',
'Dinu (Ioniță) Claudia-Andreea':'Dinu A.',
'Arabadjiyski': 'Arabadjiyski A.',
'Mandia':'Mandia F.',
'Stanev':'Stanev A.',
'Mochalov':'Mochalov O.',
'Sozzi':'Sozzi A.',
'Crăciunel':'Crăciunel I.',
'Craciunel':'Crăciunel I.',
'Sagaev':'Sagaev L.',
'Buzás':'Búzás C.',
'Csala':'Csala D.',
'Dimitrov':'Dimitrov M.',
'Józsa':'Józsa L.',
'Creangă':'Creangă A.',
'Duțescu':'Duțescu M.',
'Furtună':'Furtună G.',
'Gârbea':'Gârbea I.',
'Stupu':'Stupu I.',
'Mahika-Voiconi':'Mahika-Voiconi S.',
'Mahika':'Mahika-Voiconi S.',
'Stanciu':'Stanciu F.',
'Vrânceanu':'Vrânceanu R.',
'Wolfs':'Wolfs J.',
'Ducarme':'Ducarme A.',
'Sbârcea':'Sbârcea B.',
'Mocian':'Mocian A.',
'Hatvani':'Hatvani L.',
'Dusan':'Dusan N.',
'Borota':'Borota V.',
'Tsushima':'Tsushima K.',
'Tráser':'Tráser T.',
'Colțea':'Colțea A.',
'Brîcov':'Brîcov A.',
'Yamamoto':'Yamamoto M.',
'Crăciun':'Crăciun D.'}
redflags_names=['-','—','—',np.nan,'. ()','— ','- -.','- -. (-)',
'Kashi','Sankon','București','Victorii:','Sakura','Taiken','Ikada','Sonkei','CRK','Museido',
'Ichimon','Bushi Tokukai 1','Competitori – Shiai-sha','Echipa - roşu','Numele şi prenumele',
'Victorii:','Victorii: 0','Victorii: 1','Victorii: 2','Victorii: 3','Victorii: 4',
'Victorii: 5','?','Kyobukan','2/5','2/6','3/8','Finala','Kyobukan (0/0/0)','―',
'(clasament final după meci de baraj)','CRK (Bucuresti)','Kaybukan','Isshin (Cluj)',
'Ikada (Bucureşti)','Kyobukan (Braşov)','Puncte:','KASHI','Budoshin','Isshin',
'— (—)','4. B.','4. Baraj: Stupu M - Hostina','4. Baraj: Moise KM - Korenschi M',
'Bushi Tokukai (2/8/17)','CRK 2 (1/6/14)', 'CRK 2','CRK 1','Loc I.:',
'Bushi Tokukai 2 (M Ciuc)','Echipa suport']
redflags_names2=['Bushi Tokukai','Eliminatoriu','finala','Finala','Fianala','Ikada','Ichimon','Pool',
'Locul ','Lotul ','Loc ','Grupa ','Isshin','Meciul ','Victorii:']
name_equals={'Chirea M.':'Chirea A.',
'Ghinet C.':'Ghineț C.',
'Domnița M.':'Domniță M.',
'Garbea I.':'Gârbea I.',
'Ah-hu W.':'Ah-hu S.',
'Horvát M.':'Horváth M.',
'Ionita A.':'Ioniță A.',
'Medvedschi I.':'Medvețchi I.',
'Mahika S.':'Mahika-Voiconi S.',
'Mate L.':'Máté L.',
'Hentea L.':'Hentea A.',
'Stupu I.':'Stupu A.',
'Ah-Hu S.':'Ah-hu S.',
'Alexa I.':'Alexa A.',
'Angelescu M.':'Angelescu M.',
'Apostu D.':'Apostu T.',
'Brâcov A.':'Brîcov A.',
'Catoriu D.':'Cantoriu D.',
'Călina A.':'Călina C.',
'Buzás C.':'Búzás C.',
'Korenshi E.':'Korenschi E.',
'Pleșa R.':'Pleșea R.',
'Galos A.':'Galoș A.',
'Győrfi G.':'Györfi G.',
'Győrfi S.':'Györfi S.',
'Ghineț G.':'Ghineț C.',
'Hostina E.':'Hoștină E.',
'Hostină E.':'Hoștină E.',
'Ianăs F.':'Ianăș F.',
'Ianas F.':'Ianăș F.',
'Lacatus M.':'Lăcătuș M.',
'Máthé L.':'Máté L.',
'Burinaru A.':'Burinaru Al.',
'Nastase M.':'Năstase E.',
'Oprisan A.':'Oprișan A.',
'Pârlea A.':'Pîrlea A.',
'Sabau D.':'Sabău D.',
'Spriu C.':'Spiru C.',
'Bíró S.':'Biró S.',
'Stănculascu C.':'Stănculescu C.',
'Vrânceanu M.': 'Vrânceanu L.',
'Wasicek V.':'Wasicheck W.',
'Wasicsec W.':'Wasicheck W.',
'Wasicsek W.':'Wasicheck W.',
'Zolfoghari A.':'Zolfaghari A.'}
name_doubles={
'Cristea Cristina':'Cristea Cr.',
'Cristea Călin-Ștefan':'Cristea Că.',
'Sandu Marius-Cristian':'Sandu Mar.',
'Sandu Matei-Serban':'Sandu Mat.',
'Georgescu Andrei':'Georgescu An.',
'Georgescu Alexandra':'Georgescu Al.',
'Péter Csongor':'Péter Cso.',
'Péter Csanád':'Péter Csa.',
'Luca Mihnea':'Luca Mihn.',
'Luca Mihai-Cătălin':'Luca Miha.',
'Luca':'Luca Miha.',
'Luca M':'Luca Miha.',
'Luca M.':'Luca Miha.',
'Luca Mihai':'Luca Miha.',
'Luca Traian-Dan':'Luca Tr.',
'Luca Tudor':'Luca Tu.',
'Canceu Anamaria':'Canceu An.',
'Canceu Adriana-Maria':'Canceu Ad.',
'Cioată Daniel-Mihai':'Cioată M.',
'Cioată Dragoș':'Cioată D.',
'Burinaru Alexandra':'Burinaru Al.',
'Burinaru Andreea':'Burinaru An.',
'Dudaș Francisc Andrei':'Dudaș F.',
'Dudaș Francisc':'Dudaș F.'}
letter_norm={'ţ':'ț','ş':'ș','Ş':'Ș'}
def name_cleaner(name):
if name in name_doubles:
return name_doubles[name]
else:
for letter in letter_norm:
name=name.replace(letter,letter_norm[letter])
if name not in name_exceptions:
nc=name.replace(' ',' ').split('(')
else:
nc=name_exceptions[name].split('(')
rname=nc[0].strip()
rnames=rname.split(' ')
sname=rnames[0]+' '+rnames[1][0]+'.'
if sname in name_equals:
sname=name_equals[sname]
if sname in name_doubles:
print(name,sname)
return sname
else:
return sname
In [ ]:
def name_ok(name):
if name==np.nan: return False
if str(name)=='nan': return False
if name not in redflags_names:
if np.array([i not in name for i in redflags_names2]).all():
return True
return False
Standardize names
In [ ]:
all_players={}
all_players_r={}
all_players_unsorted=set()
for year in matches:
for competition in matches[year]:
for match in matches[year][competition]:
for color in ['aka','shiro']:
name=match[color]['name']
all_players_unsorted.add(name)
if name_ok(name):
name=name_cleaner(name)
rname=match[color]['name']
if rname not in all_players_r:all_players_r[rname]=name
if name not in all_players: all_players[name]={}
if year not in all_players[name]:all_players[name][year]={'names':set()}
all_players[name][year]['names'].add(rname)
In [ ]:
name_linker={}
for i in members.index:
name=members.loc[i]['name']
try:
cname=name_cleaner(name)
except:
print(name)
if cname not in name_linker:name_linker[cname]=set()
name_linker[cname].add(name)
In [ ]:
names_abbr={}
for name in name_linker:
if len(name_linker[name])>1:
#only for dev to create exceptions for duplicate person names.
print(name,name_linker[name])
for i in name_linker[name]:
names_abbr[i]=name
In [ ]:
names_abbr_list=[]
name_abbr2long={}
name_abbr2club={}
for i in members.index:
name=members.loc[i]['name']
club=members.loc[i]['club']
year=members.loc[i]['year']
names_abbr_list.append(names_abbr[name])
name_abbr2long[names_abbr[name]]=name
if names_abbr[name] not in name_abbr2club:name_abbr2club[names_abbr[name]]={}
name_abbr2club[names_abbr[name]][year]=club
In [ ]:
members['name_abbr']=names_abbr_list
In [ ]:
all_shinpan={}
all_shinpan_r={}
all_shinpan_unsorted=set()
for year in matches:
for competition in matches[year]:
for match in matches[year][competition]:
if 'shinpan' in match:
for color in ['fukushin1','shushin','fukushin2']:
aka=match['aka']['name']
shiro=match['shiro']['name']
if (name_ok(aka)) and\
(name_ok(shiro)) and\
(name_cleaner(aka) in all_players) and\
(name_cleaner(shiro) in all_players):
rname=match['shinpan'][color]
all_shinpan_unsorted.add(rname)
if name_ok(rname):
name=name_cleaner(rname)
if name not in all_shinpan: all_shinpan[name]=[]
all_shinpan[name].append(match)
if rname not in all_shinpan_r:all_shinpan_r[rname]=name
In [ ]:
name_abbr2long_extends={
'Ishikubo S.':'Ishikubo Shinichi',
'Yamamoto M.':'Yamamoto M.',
'Wolfs J.':'Wolfs Jan Claude',
'Tsushima K.':'Tsushima Kanji',
'Ducarme A.':'Ducarme Alain',
'Tráser T.':'Tráser Tamás',
'Borota B.':'Borota B.',
'Arabadjiyski A.':'Arabadjiyski Alexandar',
'Csala T.':'Csala Tibor',
'Sagaev L.':'Sagaev Lubomir',
'Hatvani L.':'Hatvani Lóránt',
'Dusan N.':'Dusan N',
'Borota V.':'Borota Vladimir',
'Mandia F.':'Mandia Fabrizio',
'Stanev A.':'Stanev A.',
'Mochalov O.':'Mochalov O.',
'Sozzi A.':'Sozzi A.',
'Dimitrov M.':'Dimitrov M.'
}
for i in all_shinpan.keys():
if i not in name_abbr2long:
name_abbr2long[i]=name_abbr2long_extends[i]
Infer clubs
In [ ]:
#naive infer
redflags_clubs=['','N/A','RO1','RO2']
club_equals={'MLD':'MOL/Md',
'MOL':'MOL/Md',
'IKD':'IKA',
'HUN':'HUN/Hu',
'BUL':'BUL/Bg',
'TUR':'TUR/Tr',
'MAC':'MAC/Mc',
'MNE':'MNE/Mn',
'SRB':'SRB/Sr',
'ITA':'ITA/It',
'ISS':'ISH',
'Musso, Bg':'MUS/Bg',
'Makoto, Sr':'MAK/Sr',
'Szeged, Hu':'SZE/Hu'}
for name in all_players:
for year in all_players[name]:
for name_form in all_players[name][year]['names']:
if '(' in name_form:
club=name_form.split('(')[1].strip()[:-1]
if club in club_equals: club=club_equals[club]
if club not in redflags_clubs:
all_players[name][year]['club']=club
In [ ]:
for name in all_players:
for year in all_players[name]:
if 'club' not in all_players[name][year]:
#more than 1 year?
years=np.sort(list(all_players[name].keys()))
if len(years)>1:
#get club from previous year
for y in range(years[0],year):
if y in all_players[name]:
if 'club' in all_players[name][y]:
all_players[name][year]['club']=all_players[name][y]['club']
#if still not found, get club from next year
if 'club' not in all_players[name][year]:
#get club from next year
for y in np.arange(years[-1],year,-1):
if y in all_players[name]:
if 'club' in all_players[name][y]:
all_players[name][year]['club']=all_players[name][y]['club']
In [ ]:
for name in all_players:
if name not in name_abbr2long:
#infer using longest available name
names={len(j):j for i in all_players[name] for j in all_players[name][i]['names']}
if len(names)>0:
inferred_name=names[max(names.keys())]
if '(' in inferred_name:
inferred_name=inferred_name[:inferred_name.find('(')-1]
print(name,inferred_name)
name_abbr2long[name]=inferred_name
else:
print(name,all_players[name])
In [ ]:
for name in all_players:
years=np.sort(list(all_players[name].keys()))
for year in all_players[name]:
if 'club' not in all_players[name][year]:
#get from list
if name in name_abbr2club:
minyear=min(name_abbr2club[name].keys())
if year in name_abbr2club[name]:
all_players[name][year]['club']=name_abbr2club[name][year]
elif year<minyear:
all_players[name][year]['club']=name_abbr2club[name][minyear]
elif len(years)>1:
#get club from previous year
for y in range(years[0],year):
if y in all_players[name]:
if 'club' in all_players[name][y]:
all_players[name][year]['club']=all_players[name][y]['club']
#if still not found, get club from next year
if 'club' not in all_players[name][year]:
#get club from next year
for y in np.arange(years[-1],year,-1):
if y in all_players[name]:
if 'club' in all_players[name][y]:
all_players[name][year]['club']=all_players[name][y]['club']
Interpolate missing years for members
In [ ]:
clubs_manual={
'Balázs-Kercsó Z.':'BTK',
'Nagy V.':'ISH',
'Goró L.':'BTK',
'Ghineț G.':'YUK',
'Cioată E.':'KAS',
'Leat M.':'IKA',
'Perianu S.':'KNS',
'Ah-hu S.':'ICH',
'Preda A.':'CRK',
'Salló Z.':'BTK',
'András Z.':'BTK',
'Neagu F.':'IKA',
'Bódi Z.':'KYO',
'Bumbu D.':'ISH',
'Botean A.':'ISH',
'Moldoveanu M.':'ISH',
'Jeszenszki T.':'BTK',
'Suru N.':'SAM',
'Balázs S.':'BTK',
'Perdi L.':'ISH',
'Oprișan A.':'IKA',
'Horváth D.':'BTK',
'Sandache I.':'BTK',
'Moise T.':'KAY',
'Angelescu M.':'SAM',
'Bărbulescu E.':'MUS',
'Canceu A.':'KAS',
'Crișan E.':'ISH',
'Duicu T.':'KAS',
'Dumbravă L.':'ISH',
'Iordan R.':'IKA',
'Jianu A.':'MUS',
'Keresztes M.':'BTK',
'Macavei I.':'KYO',
'Mitelea C.':'ICH',
'Pavel A.':'IKA',
'Pienaru S.':'ISH',
'Szikszai M.':'BTK',
'Tamang S.':'SAM',
'Tiron L.':'KNS',
'Turdean S.':'KAS',
'Wasicheck W.':'ISH',
'Ștefan C.':'IKA'
}
In [ ]:
club_errors=[]
for name in all_players:
for year in all_players[name]:
if 'club' not in all_players[name][year]:
#if still not found, print error, infer other way
if name in clubs_manual:
all_players[name][year]['club']=clubs_manual[name]
else:
all_players[name][year]['club']='XXX'
club_errors.append(name)
In [ ]:
set(club_errors)
In [ ]:
clubs={}
for name in all_players:
for year in all_players[name]:
club=all_players[name][year]['club']
if club not in clubs:clubs[club]={}
if year not in clubs[club]:clubs[club][year]=set()
clubs[club][year].add(name)
In [ ]:
def outcome_cleaner(outcome):
if outcome=='E': return True
else: return False
In [ ]:
def outcome_from_points(aka,shiro):
if aka==shiro: return 'X',0
elif aka>shiro: return 'A',str(aka-shiro)
else: return 'S',str(shiro-aka)
In [ ]:
redflags_points=['Puncte']
def point_clean1(point):
return point.replace('○','O').replace('I','H').replace('×','')\
.replace('–','').replace('1','O').replace('—','').replace('?','')
def points_cleaner(points):
hansoku=0
if '∆' in points:
hansoku=1
points=points.replace('∆','')
if '▲' in points:
hansoku=1
points=points.replace('▲','')
if '(Ht)' in points:
hansoku=1
points=points.replace('(Ht)','')
if '(victorie)' in points:
points=points.replace('(victorie)','OO')
points=points.strip()
if len(points)>2:
print(points,'error')
elif len(points)>1:
point1=points[0]
point2=points[1]
elif len(points)>0:
point1=points[0]
point2=''
else:
point1=''
point2=''
point1=point_clean1(point1)
point2=point_clean1(point2)
return point1,point2,len(points),hansoku
In [ ]:
def club_cleaner(club):
if '/' in club:
return club.split('/')[0],club.split('/')[1].upper()
else:
return club,'RO'
In [ ]:
pretty_clubs={'ARA':'Arashi', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita',
'ICH':'Ichimon', 'IKA':'Ikada','ISH':'Ishhin', 'IT':'Italy','HU':'Hungary',
'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
'SR':'Serbia', 'MN':'Montenegro', 'MD':'Moldova', 'MUS':'Museido',
'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey', 'XXX':'Unknown',
'YUK':'Yu-kai','KAY':'Kaybukan'}
def pretty_club(club, country):
if country!='RO':
return pretty_clubs[country]
else: return pretty_clubs[club]
In [ ]:
unregistered_members=[]
for name in all_players:
if name not in set(members['name_abbr'].values):
years=np.sort(list(all_players[name].keys()))
for year in range(min(years),max(years)+1):
if year in all_players[name]:
iyear=year
else:
iyear=max(years)
club,country=club_cleaner(all_players[name][iyear]['club'])
if country=='RO':
activ='Inactiv'
dan=0
else:
activ=''
dan=''
unregistered_members.append({'name':name_abbr2long[name],
'club':club,'active':activ,'year':year,'dan':dan,'country':country,
'pretty_club':pretty_club(club,country)})
In [ ]:
members['country']='RO'
In [ ]:
members2=pd.concat([members,pd.DataFrame(unregistered_members)])
Appears in competition but Mu DAN
In [ ]:
members_mu_dan_extensions=[]
members_by_name=members2.set_index(['name'])
for year in matches:
members_by_year=members2.set_index(['year']).loc[year]
for competition in matches[year]:
print(year,competition)
for k in matches[year][competition]:
aka=k['aka']['name']
shiro=k['shiro']['name']
if (name_ok(aka)) and\
(name_ok(shiro)) and\
(name_cleaner(aka) in all_players) and\
(name_cleaner(shiro) in all_players):
for a in ['aka','shiro']:
for h in k[a]:
if h=='name':
name=k[a][h]
rname=name_abbr2long[all_players_r[name]]
if rname not in members_by_year['name'].values:
dummy=members_by_name.loc[[rname]]
dummy=dummy[dummy['year']==min(dummy['year'])]
dummy=dummy.reset_index()
dummy['year']=year
members_mu_dan_extensions.append(dummy)
In [ ]:
members3=pd.concat([members2,pd.concat(members_mu_dan_extensions)])
In [ ]:
members3=members3.drop('Unnamed: 0',axis=1).drop_duplicates()
In [ ]:
members3.to_csv('data/members.csv')
In [ ]:
master_matches=[]
for year in matches:
members_by_year=members3.set_index(['year']).loc[year].drop_duplicates()
for competition in matches[year]:
print(year,competition)
for k in matches[year][competition]:
good=True
match={'year':year,'competition':competition}
match['match_category'],match['match_teams'],match['match_phase']=match_cleaner(year,k['match_type'])
if 'shinpan' in k:
for color in ['fukushin1','shushin','fukushin2']:
if color in k['shinpan']:
if k['shinpan'][color] in all_shinpan_r:
match[color]=name_abbr2long[all_shinpan_r[k['shinpan'][color]]]
aka=k['aka']['name']
shiro=k['shiro']['name']
if (name_ok(aka)) and\
(name_ok(shiro)) and\
(name_cleaner(aka) in all_players) and\
(name_cleaner(shiro) in all_players):
for a in ['aka','shiro']:
points=''
for h in k[a]:
if h=='name':
name=k[a][h]
match[a+' name']=name_abbr2long[all_players_r[name]]
club, country=club_cleaner(all_players[all_players_r[name]][year]['club'])
match[a+' club'], match[a+' country']=club, country
match[a+' dan']=members_by_year.set_index(['name']).\
loc[match[a+' name']]['dan']
match[a+' pretty_club']=pretty_club(club, country)
else:
point=k[a][h]
if str(point)=='nan': point=''
points=points+point
for redflag in redflags_points:
if redflag in points:
good=False
if good:
match[a+' point1'],match[a+' point2'],match[a+' points'],match[a+' hansoku']=points_cleaner(points)
else:
good=False
if good:
if 'outcome' in k:
match['encho']=outcome_cleaner(k['outcome'])
else:
match['encho']=False
match['winner'],match['difference']=outcome_from_points(match['aka points'],match['shiro points'])
master_matches.append(match)
In [ ]:
data=pd.DataFrame(master_matches)
Cleanup
In [ ]:
data['aka hansoku']=data['aka hansoku'].replace(0,'').replace(1,'Δ')
data['shiro hansoku']=data['shiro hansoku'].replace(0,'').replace(1,'Δ')
In [ ]:
data.to_csv('data/matches.csv')
Group by player
In [ ]:
aka=data[[i for i in data.columns if 'shiro ' not in i]]
aka.columns=[i.replace('aka ','') for i in aka.columns]
aka['color']='aka'
aka['opponent']=data['shiro name']
In [ ]:
shiro=data[[i for i in data.columns if 'aka ' not in i]]
shiro.columns=[i.replace('shiro ','') for i in shiro.columns]
shiro['color']='shiro'
shiro['opponent']=data['aka name']
In [ ]:
extended_matches=pd.concat([aka,shiro],axis=0).reset_index(drop=True)
In [ ]:
extended_matches.head()
In [ ]:
extended_matches.to_csv('data/extended_matches.csv')
In [ ]:
p1=extended_matches[[i for i in extended_matches.columns if i!='point2']]
p2=extended_matches[[i for i in extended_matches.columns if i!='point1']]
p1.rename(columns={'point1':'point'}, inplace=True)
p2.rename(columns={'point2':'point'}, inplace=True)
In [ ]:
extended_points=pd.concat([p1,p2],axis=0).reset_index(drop=True)
In [ ]:
extended_points.to_csv('data/extended_points.csv')
In [ ]:
extended_points.columns
In [ ]:
shu=extended_points[[i for i in extended_points.columns if 'fukushin' not in i]]
shu.columns=[i.replace('shushin','shinpan') for i in shu.columns]
fk1=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin2' not in i]]
fk1.columns=[i.replace('fukushin1','shinpan') for i in fk1.columns]
fk2=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin1' not in i]]
fk2.columns=[i.replace('fukushin2','shinpan') for i in fk2.columns]
extended_shinpan=pd.concat([shu,fk1,fk2],axis=0).reset_index(drop=True)
In [ ]:
extended_shinpan.to_csv('data/extended_shinpan.csv')
In [ ]:
extended_shinpan.columns
In [ ]:
extended_shinpan['club'].unique()
Competitor statistics
In [ ]:
competitors={}
for i in data.T.iteritems():
for a in ['aka ','shiro ']:
name=i[1][a+'name']
club=i[1][a+'club']
if name not in competitors:
competitors[name]={'U':0,'club':club}
for j in ['point1','point2']:
point=i[1][a+j]
if point!='':
if point not in competitors[name]:competitors[name][point]=0
competitors[name][point]+=1
competitors[name]['U']+=1
In [ ]:
data2=pd.DataFrame(competitors)
In [ ]:
data2.T.to_csv('data/competitors.csv')
In [ ]:
In [ ]:
In [ ]: